from wordcloud import WordCloud
import matplotlib.pyplot as plt
import pandas as pd

# Read the CSV file containing the desired columns
df = pd.read_csv("modified_csv_file.csv")
plt.rcParams['font.family'] = 'SimHei'

# Group the dataset by the clustering label
grouped = df.groupby('tKMeans_Label')

# Define the feature columns list
feature_columns = ['Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q10', 'Q11', 'Q12', 'Q13', 'Q14']

# Iterate over each cluster group
for group_label, group_data in grouped:
    print(f"Cluster {group_label}:")

    # Create an empty string
    text = ""

    # Iterate over each feature column
    for col in feature_columns:
        feature_counts = group_data[col].value_counts()
        most_common_value = feature_counts.index[0]  # Get the most common feature value
        text += f"{most_common_value} "  # Add the most common feature value to the string
        print(f"Feature: {col}, Most Common Value: {most_common_value}")

    print()

    # Create a word cloud object and generate the word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white', font_path='STSONG.ttf').generate(text)

    # Display the word cloud
    plt.figure(figsize=(10, 6))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'Most Common Features in Cluster {group_label}')
    plt.show()